/* =============================================================================
 * decomposing changes in college premium
 * ===========================================================================*/


/*******************************************************************************

									US
		
*******************************************************************************/	

use "data\US.dta", clear

drop if outlier

keep if male == 1

gen college = .
replace college = 0 if educ == 73  | educ == 72
replace college = 1 if educ == 111 | educ == 110
drop if college == .

gen wexp = .
replace wexp = min(age - eduyrs - 6, age - 18) if college == 0
replace wexp = min(age - eduyrs - 6, age - 22) if college == 1

rename ybirth byear
drop if byear < 1935
drop if byear > 1984

rename lnearnings lwage
rename asecwt perwt 

keep if age >= 25 & age <= 54		// prime age workers

/*******************************************************************************
discretize
*******************************************************************************/
egen wexp_group = cut(wexp), at(0(5)40)
drop if wexp_group == .
gen byear_group = 5*floor(byear/5)

preserve
collapse (mean)lnw = lwage (count) L=lwage [pw = perwt], by(year college)
reshape wide lnw L, i(year) j(college)
save "temp\wage_us_byedu.dta", replace
restore


collapse (count) obs=lwage [pw = perwt], by(year wexp_group byear_group college)

bys year college: egen obs_total = total(obs)
gen share = obs / obs_total
drop obs*
reshape wide share, i(year wexp_group byear_group) j(college)

/*******************************************************************************
merge the estimated experience and cohort profiles
*******************************************************************************/
gen plot_wexp = wexp_group
merge m:1 plot_wexp using "temp\us_exp_profile_edu0.dta"
keep if _m == 3
drop _m
merge m:1 plot_wexp using "temp\us_exp_profile_edu1.dta"
keep if _m == 3
drop _m

gen plot_coh = byear_group
merge m:1 plot_coh using "temp\us_coh_profile_edu0.dta"
keep if _m == 3
drop _m
merge m:1 plot_coh using "temp\us_coh_profile_edu1.dta"
keep if _m == 3
drop _m


sort year wexp_group byear_group
forvalues i = 0/1 {
	gen h_wgt_`i' = profile_wexp_us_`i' * profile_coh_us_`i' * share`i'
}

collapse (sum) h_wgt_*, by(year)
rename h_wgt_* h*

save "temp\construct_h_us_byedu.dta", replace



/*******************************************************************************

									China 
		
*******************************************************************************/		


use "data\China.dta", clear

drop if outlier
keep if male == 1

gen college = .
replace college = 0 if eduyrs == 12
replace college = 1 if eduyrs == 16 
drop if college == .

gen wexp = .
replace wexp = min(age - eduyrs - 6, age - 18) if college == 0
replace wexp = min(age - eduyrs - 6, age - 22) if college == 1

rename ybirth byear
drop if byear < 1935
drop if byear > 1984

rename lnearnings lwage

keep if age >= 25 & age <= 54		// prime age workers

/*******************************************************************************
discretize
*******************************************************************************/
egen wexp_group = cut(wexp), at(0(5)40)
drop if wexp_group == .
gen byear_group = 5*floor(byear/5)

preserve
collapse (mean)lnw = lwage (count) L=lwage, by(year college)
reshape wide lnw L, i(year) j(college)
/* 
education coding changes in UHS
prior to 1992, dazhuan is pooled with benke
so adjust for it to be consistent.
*/
do "temp/adjust_benke_prior92.do"
replace L1 = L1*.3576 if year <= 1991		
save "temp\wage_cn_byedu.dta", replace
restore

collapse (count) obs=lwage, by(year wexp_group byear_group college)

bys year college: egen obs_total = total(obs)
gen share = obs / obs_total
drop obs*
reshape wide share, i(year wexp_group byear_group) j(college)


/*******************************************************************************
merge the estimated experience and cohort profiles
*******************************************************************************/
gen plot_wexp = wexp_group
merge m:1 plot_wexp using "temp\cn_exp_profile_edu0.dta"
keep if _m == 3
drop _m
merge m:1 plot_wexp using "temp\cn_exp_profile_edu1.dta"
keep if _m == 3
drop _m

gen plot_coh = byear_group
merge m:1 plot_coh using "temp\cn_coh_profile_edu0.dta"
keep if _m == 3
drop _m
merge m:1 plot_coh using "temp\cn_coh_profile_edu1.dta"
keep if _m == 3
drop _m


sort year wexp_group byear_group
forvalues i = 0/1 {
	gen h_wgt_`i' = profile_wexp_cn_`i' * profile_coh_cn_`i' * share`i'
}

collapse (sum) h_wgt*, by(year)

rename h_wgt_* h*

save "temp\construct_h_cn_byedu.dta", replace


/*******************************************************************************

									SBTC 
		
*******************************************************************************/
* baseline value from Bowlus-Lochner-Robinson-Suleymanoglu
global sigma = 3.8

/*******************************************************************************
									U.S.
*******************************************************************************/
use "temp\wage_us_byedu.dta", clear
merge 1:1 year using "temp\construct_h_us_byedu.dta"
drop _m 
gen plot_year = year
merge 1:1 plot_year using "temp\us_year_profile_edu0.dta", keepusing(profile_year_us)
drop _m 
merge 1:1 plot_year using "temp\us_year_profile_edu1.dta", keepusing(profile_year_us)
drop _m 

forvalues e = 0/1{
	gen lnp`e' = log(profile_year_us_`e')
	gen lnL`e' = log(L`e')
	gen lnh`e' = log(h`e')
}

foreach v in w p h L {
	gen diff_ln`v' = ln`v'1 - ln`v'0
}


foreach v in w p h L {
	gen ddln`v' = diff_ln`v' - diff_ln`v'[1]
}

gen res = ddlnw - ddlnp - ddlnh

twoway 	(scatter ddlnw year, msymbol(d) msize(medium) connect(l) mcolor(black) lcolor(black))	///
		(scatter ddlnh year, msymbol(oh) msize(medium) connect(l) lpattern(dash) mcolor(blue) lcolor(blue))		///
		(scatter ddlnp year, msymbol(th) msize(medium) connect(l) lpattern(dash) mcolor(red) lcolor(red))	///
		(scatter res year, msymbol(x) msize(medium) connect(l) lpattern(-.) mcolor(gs8) lcolor(gs8)),	///,	///
		xlabel(1985(5)2012, labsize(medlarge))  ylabel(-.2(.1).4, labsize(medlarge)) yscale(range(0 0.41)) xtitle("Year", size(large)) 	///
		legend(order(1 "Relative Earnings" 4 "Residual" 2 "Relative Human Capital Quantity" 3 "Relative Human Capital Price" )	///
		rows(2) pos(6))		///
		title("U.S.", size(large) color(black)) name(us_college_premium,replace)

gen contrib_h = - 1/$sigma * ddlnh
gen contrib_L = - 1/$sigma * ddlnL
gen contrib_A = ddlnp - contrib_h - contrib_L

twoway  (scatter ddlnp year,  msymbol(t) msize(medium) connect(l) mcolor(red) lcolor(red))	///
		(scatter contrib_L year, msymbol(x) msize(medium) connect(l) lpattern(dash) mcolor(green) lcolor(green))	///
		(scatter contrib_h year, msymbol(oh) msize(medium) connect(l) lpattern(dash) mcolor(blue) lcolor(blue))	///
		(scatter contrib_A year, msymbol(dh) msize(medium) connect(l) lpattern(dash) mcolor(orange) lcolor(orange)),	///
		xlabel(1985(5)2012, labsize(medlarge))  ylabel(-.2(.1).2, labsize(medlarge)) xtitle("Year", size(large)) 	///
		legend(order(1 "Relative Human Capital Price" 2 "Relative Labor Supply" 3 "Relative Human Capital Quantity" 4 "Skill-Biased Technological Change")	///
		rows(2) pos(6))		///
		title("U.S.", size(large) color(black)) name(us_skill_bias,replace)

*** sensitivity to different values of sigma
gen contrib_A_1 = ddlnp + 1/1.4 * ddlnh + 1/1.4 * ddlnL

forvalues i = 2/5 {
	gen contrib_A_`i' = ddlnp + 1/`i' * ddlnh + 1/`i' * ddlnL
}

twoway  (scatter contrib_A_1 year,  msymbol(t) msize(medium) connect(l) mcolor(black) lcolor(black))	///
		(scatter contrib_A_2 year, msymbol(x) msize(medium) connect(l) lpattern(dash) mcolor(black*0.8) lcolor(black*0.8))	///
		(scatter contrib_A_3 year, msymbol(oh) msize(medium) connect(l) lpattern(dash) mcolor(black*0.5) lcolor(black*0.5))	///
		(scatter contrib_A_5 year, msymbol(dh) msize(medium) connect(l) lpattern(dash) mcolor(black*0.2) lcolor(black*0.2)),	///
		xlabel(1985(5)2012, labsize(medlarge))  ylabel(-0.2(.2)0.6, labsize(medlarge)) xtitle("Year", size(large)) 	///
		legend(order(1 "σ=1.4" 2 "σ=2" 3 "σ=3" 4 "σ=5")	///
		rows(1) pos(6))		///
		title("U.S.", size(large) color(black)) name(us_robustness,replace)

/*******************************************************************************
								China
*******************************************************************************/
use "temp\wage_cn_byedu.dta", clear
merge 1:1 year using "temp\construct_h_cn_byedu.dta"
drop _m 
gen plot_year = year
merge 1:1 plot_year using "temp\cn_year_profile_edu0.dta", keepusing(profile_year_cn)
drop _m 
merge 1:1 plot_year using "temp\cn_year_profile_edu1.dta", keepusing(profile_year_cn)
drop _m 

forvalues e = 0/1{
	gen lnp`e' = log(profile_year_cn_`e')
	gen lnL`e' = log(L`e')
	gen lnh`e' = log(h`e')
}

foreach v in w p h L {
	gen diff_ln`v' = ln`v'1 - ln`v'0
}


foreach v in w p h L {
	gen ddln`v' = diff_ln`v' - diff_ln`v'[1]
}

gen res = ddlnw - ddlnp - ddlnh

twoway 	(scatter ddlnw year, msymbol(d) msize(medium) connect(l) mcolor(black) lcolor(black))	///
		(scatter ddlnh year, msymbol(oh) msize(medium) connect(l) lpattern(dash) mcolor(blue) lcolor(blue))		///
		(scatter ddlnp year, msymbol(th) msize(medium) connect(l) lpattern(dash) mcolor(red) lcolor(red))	///
		(scatter res year, msymbol(x) msize(medium) connect(l) lpattern(-.) mcolor(gs8) lcolor(gs8)),	///,	///
		xlabel(1985(5)2012, labsize(medlarge))  ylabel(-.2(.1).4, labsize(medlarge)) yscale(range(0 0.41)) xtitle("Year", size(large)) 	///
		legend(order(1 "Relative Earnings" 4 "Residual" 2 "Relative Human Capital Quantity" 3 "Relative Human Capital Price" )	///
		rows(2) pos(6))		///
		title("China", size(large) color(black)) name(cn_college_premium,replace)

gen contrib_h = - 1/$sigma * ddlnh
gen contrib_L = - 1/$sigma * ddlnL
gen contrib_A = ddlnp - contrib_h - contrib_L

twoway  (scatter ddlnp year,  msymbol(t) msize(medium) connect(l) mcolor(red) lcolor(red))	///
		(scatter contrib_L year, msymbol(x) msize(medium) connect(l) lpattern(dash) mcolor(green) lcolor(green))	///
		(scatter contrib_h year, msymbol(oh) msize(medium) connect(l) lpattern(dash) mcolor(blue) lcolor(blue))	///
		(scatter contrib_A year, msymbol(dh) msize(medium) connect(l) lpattern(dash) mcolor(orange) lcolor(orange)),	///
		xlabel(1985(5)2012, labsize(medlarge))  ylabel(-0.6(.3)0.6, labsize(medlarge)) xtitle("Year", size(large)) 	///
		legend(order(1 "Relative Human Capital Price" 2 "Relative Labor Supply" 3 "Relative Human Capital Quantity" 4 "Skill-Biased Technological Change")	///
		rows(2) pos(6))		///
		title("China", size(large) color(black)) name(cn_skill_bias,replace)

grc1leg us_college_premium cn_college_premium, name(combine_college_premium, replace) row(1)
graph export "figures\decompose_college_premium.pdf", as(pdf) replace

grc1leg us_skill_bias cn_skill_bias, name(combine_skill_bias, replace) row(1)
graph export "figures\SBTC.pdf", as(pdf) replace
